<?php

namespace App\Http\Controllers;

use Illuminate\Http\Request;

use App\Http\Requests;
use App\Http\Controllers\Controller;
use DB;

class CaiController extends Controller
{
    /**
     * 采集入口
     * @return [type] [description]
     */
    public function caiji()
    {
        //设置脚本永不超时
        set_time_limit(0);

        //分类采集  http://www.yougou.com/f-0-0-04Y001-0.html  总页数  20
        //http://www.yougou.com/f-nike-PTK-0-6.html  
        //http://www.yougou.com/f-nike-PTK-0-6-8.html

        //详情测试
        // $url = "http://www.yougou.com/c-lesmart/sku-mdfy0901-100498866.shtml#ref=list&po=list";
        // $this->getDetail($url);

        //列表测试 
        
            $listurl = 'http://www.yougou.com/f-0-TFA_D74_QOS-0-0-2.html';
            // 获取列表urls
            $urls = $this->getList($listurl);
            foreach($urls as $k=>$v){
                $this->getDetail($v);
            }
        
        
    }

    // 列表页的采集
    public function getList($url)
    {
        // curlc初始化
        $ch = curl_init($url);
        // 设置
        curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
        curl_setopt($ch,CURLOPT_HTTPHEADER,[
            'Host: www.yougou.com',
            'User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0',
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
            'Accept-Encoding: deflat',
            'Referer: http://www.yougou.com/gentlemen-clothing.shtml',
            'Cookie: NTKF_PAGE_MANAGE=%7B%22m%22%3A%5B%7B%2282995%22%3A164364%7D%2C%7B%2252298%22%3A164358%7D%2C%7B%2289424%22%3A164367%7D%5D%2C%22t%22%3A%2222%3A33%3A56%22%7D; yg_sid=3606c32f-3afa-4b94-ab46-2dceb7611d71; admanageType=error; NTKF_CACHE_DATA=%7B%22debug%22%3A%7B%7D%2C%22uid%22%3A%22kf_9923_ISME9754_guestE21DAD91-5EFF-35%22%2C%22tid%22%3A%221480511468141000654%22%2C%22fsid%22%3A%221480511465937000126%22%7D; __utma=95907011.960244260.1480511466.1480511466.1480511466.1; __utmb=95907011.5.10.1480511466; __utmc=95907011; __utmz=95907011.1480511466.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __yga=%28direct%29.%28direct%29; NTKF_T2D_CLIENTID=guestE21DAD91-5EFF-3595-9A8F-B55C30D56B89; JSESSIONID=9F616D86503FBE6E5D45A04C7E7B54E4; Hm_lvt_bc66790de6f87c591da5936f04e03efb=1480511473; Hm_lpvt_bc66790de6f87c591da5936f04e03efb=1480514808; _pzfxuvpc=1480511473348%7C1457131903203131817%7C4%7C1480514808381%7C3%7C9385624833112389589%7C3821519633520019741; yg_history_goods=100499087; bdshare_firstime=1480513252495; vizNprd=2%7C1480599657727',
            'Connection: keep-alive',
            'Upgrade-Insecure-Requests: 1'
            ]);

        $code = curl_exec($ch);
        // dd($code);
        if(!empty($code)){
            // dd(222);
            preg_match_all("/<span class='nptt'>.* href='(.*)'.*<\/span>/isU",$code,$tmp);
            // dd($tmp[1]);
            return $tmp[1];
        }
    }

    // 详情页的采集
    public function getDetail($url)

    {

        $ch = curl_init($url);
        // 设置返回的结果
        curl_setopt($ch,CURLOPT_RETURNTRANSFER, 1);
        // 模拟请求头信息
        curl_setopt($ch,CURLOPT_RETURNTRANSFER,[
            'Host: www.yougou.com',
            'User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0',
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Referer: http://www.yougou.com/f-0-TFA_D74_QOS-0-0.html',
            'Cookie: NTKF_PAGE_MANAGE=%7B%22m%22%3A%5B%7B%2248942%22%3A166729%7D%2C%7B%2289424%22%3A166735%7D%5D%2C%22t%22%3A%2222%3A37%3A53%22%7D; yg_sid=3606c32f-3afa-4b94-ab46-2dceb7611d71; admanageType=error; NTKF_CACHE_DATA=%7B%22debug%22%3A%7B%7D%2C%22uid%22%3A%22kf_9923_ISME9754_guestE21DAD91-5EFF-35%22%2C%22tid%22%3A%221480511468141000654%22%2C%22fsid%22%3A%221480511465937000126%22%7D; __utma=95907011.960244260.1480511466.1480511466.1480511466.1; __utmb=95907011.6.10.1480511466; __utmc=95907011; __utmz=95907011.1480511466.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __yga=%28direct%29.%28direct%29; NTKF_T2D_CLIENTID=guestE21DAD91-5EFF-3595-9A8F-B55C30D56B89; JSESSIONID=79D56D83E33CD1147E883BE7BA53BF33; Hm_lvt_bc66790de6f87c591da5936f04e03efb=1480511473; Hm_lpvt_bc66790de6f87c591da5936f04e03efb=1480516454; _pzfxuvpc=1480511473348%7C1457131903203131817%7C5%7C1480516453964%7C4%7C3821519633520019741%7C1036463093142711936; yg_history_goods=100499087; bdshare_firstime=1480513252495; vizNprd=2%7C1480599657727; _pzfxsvpc=1036463093142711936%7C1480516453961%7C1%7Chttp%3A%2F%2Fwww.yougou.com%2Fgentlemen-clothing.shtml',
            'Connection:close'
            ]);
        // 发送请求
        $code = curl_exec($ch);

        // 详情页url
       
        // 获取源代码
        $code = file_get_contents($url);
        // 获取标题
        preg_match('/<h1>(.*)<\/h1>/',$code,$tmp1);
        // 获取主图
        preg_match_all('/picbigurl="(.*)"/isU',$code,$tmp2);
        // 获取价格
        preg_match_all("/<del>&yen; (.*) <\/del>/isU",$code,$tmp4);
        // 获取详情
        preg_match('/<div id="contentDetail">(.*)<\/div>/isU',$code,$tmp3);
        
        // 插入数
        // dd($tmp1[1]);
        
        //插入商品主表
        $id = DB::table('shop_goods')->insert([
                'title'=>$tmp1[1],
                'price'=>rand(200,500),
                'cate_id'=>rand(1,7),
                'detail'=>$tmp3[1],
                'kucun'=>rand(100,10000),
                'color'=>'黑色@@白色@@灰色@@蓝色@@红色',
                'size'=>'M@@L@@XL@@XLL'
            ]);
        // dd($tmp2);
        // 插入图片表
        if($id){
            $data = [];
            foreach($tmp2[1] as $k=>$v){
                // dd($v);
                $t = [];
                $t['goods_id'] = $id;
                $t['path'] = $this->collectionImage($v);
                $data[] = $t;
            }
        }
        $res = DB::table('shop_goopics')->insert($data);
        // dd($res);
    }

    

    // 图片的采集
    public function collectionImage($url){
        // dd($url);
        // 获取图片信息
        $info = file_get_contents($url);
        // dd($info);
        // 文件夹的名称
        $dir = './Uploads/'. date('Ymd');
        // dd($dir);
        if(!is_dir($dir)){
            mkdir($dir);
        }
        // 文件名称
        $fileName = time().rand(100000,999999);
        // 文件后缀
        $res = pathinfo($url);
        $str = $res['extension'];
        // dd($str);
       

        preg_match('/\w+/',$str,$suffix);
        $suffix = $suffix[0];
        // dd($suffix);
        $path = $dir.'/'.$fileName.'.'.$suffix;
        file_put_contents($path,$info);
        // dd($path);
        // 返回图片的绝对路径
        return trim($path,'.');
    }

}
